from gxl_ai_utils.utils import utils_file

yaml_path = "./data_config_huawei_cosyvoice1-token.yaml"
data_config = utils_file.load_dict_from_yaml(yaml_path)
print(data_config)
big_dict = {}
# output_path = "./data_info.json"
# big_dict = utils_file.load_dict_from_json( output_path)
for key, value in data_config.items():
    data_name = key
    huawei_shards_path = value['path']
    task = value['task']
    info_dict = {
        "dataset_name": data_name,
        "task": task,
        "shards_list_path_huawei": huawei_shards_path,
        "shards_list_path_lab": '-',
        "data_list_path_lab": '-',
        "description": '-',
        "duration": -1,
    }
    if task not in big_dict:
        big_dict[task] = {}
    if data_name not in big_dict[task]:
        big_dict[task][data_name] = {}
    big_dict[task][data_name] = info_dict

for key, value in data_config.items():
    data_name = key
    huawei_shards_path = value['path']
    task = "ALL"
    info_dict = {
        "dataset_name": data_name,
        "task": task,
        "shards_list_path_huawei": huawei_shards_path,
        "shards_list_path_lab": '-',
        "data_list_path_lab": '-',
        "description": '-',
        "duration": -1,
    }
    if task not in big_dict:
        big_dict[task] = {}
    if data_name not in big_dict[task]:
        big_dict[task][data_name] = {}
    big_dict[task][data_name] = info_dict
utils_file.write_dict_to_json(big_dict, "./data_info.json")
